More grant-table code, and some related sundry improvements.
mfn++ )
{
page = &frame_table[mfn];
- page->u.inuse.domain = p;
+ page->u.inuse.domain = p;
page->u.inuse.type_info = 0;
- page->u.inuse.count_info = PGC_allocated | 1;
+ page->u.inuse.count_info = PGC_always_set | PGC_allocated | 1;
list_add_tail(&page->list, &p->page_list);
p->tot_pages++; p->max_pages++;
}
vm_assist_info[VMASST_TYPE_writable_pagetables].disable =
ptwr_disable;
+ for ( mfn = 0; mfn < max_page; mfn++ )
+ frame_table[mfn].u.inuse.count_info |= PGC_always_set;
+
/* Initialise to a magic of 0x55555555 so easier to spot bugs later. */
memset(machine_to_phys_mapping, 0x55, 4<<20);
mfn < virt_to_phys(&machine_to_phys_mapping[1<<20])>>PAGE_SHIFT;
mfn++ )
{
- frame_table[mfn].u.inuse.count_info = 1 | PGC_allocated;
- frame_table[mfn].u.inuse.type_info = 1 | PGT_gdt_page; /* non-RW */
- frame_table[mfn].u.inuse.domain = dom_xen;
+ frame_table[mfn].u.inuse.count_info |= PGC_allocated | 1;
+ frame_table[mfn].u.inuse.type_info = PGT_gdt_page | 1; /* non-RW */
+ frame_table[mfn].u.inuse.domain = dom_xen;
}
}
{
unsigned long l1v = l1_pgentry_val(l1e);
unsigned long pfn = l1_pgentry_to_pagenr(l1e);
+ struct pfn_info *page = &frame_table[pfn];
extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
if ( !(l1v & _PAGE_PRESENT) )
if ( unlikely(!pfn_is_ram(pfn)) )
{
+ /* SPECIAL CASE 1. Mapping an I/O page. */
+
/* Revert to caller privileges if FD == DOMID_IO. */
if ( d == dom_io )
d = current;
return 0;
}
+ if ( unlikely(!get_page_from_pagenr(pfn, d)) )
+ {
+ /* SPECIAL CASE 2. Mapping a foreign page via a grant table. */
+
+ int rc;
+ struct domain *e;
+ u32 count_info;
+ /*
+ * Yuk! Amazingly this is the simplest way to get a guaranteed atomic
+ * snapshot of a 64-bit value on IA32. x86/64 solves this of course!
+ * Basically it's a no-op CMPXCHG, to get us the current contents.
+ * No need for LOCK prefix -- we know that count_info is never zero
+ * because it contains PGC_always_set.
+ */
+ __asm__ __volatile__(
+ "cmpxchg8b %2"
+ : "=a" (e), "=d" (count_info),
+ "=m" (*(volatile u64 *)(&page->u.inuse.domain))
+ : "0" (0), "1" (0), "b" (0), "c" (0) );
+ if ( unlikely((count_info & PGC_count_mask) == 0) ||
+ unlikely(e == NULL) || unlikely(!get_domain(e)) )
+ return 0;
+ rc = gnttab_try_map(e, d, page, l1v & _PAGE_RW);
+ put_domain(e);
+ return rc;
+ }
+
if ( l1v & _PAGE_RW )
{
- if ( unlikely(!get_page_and_type_from_pagenr(
- pfn, PGT_writable_page, d)) )
+ if ( unlikely(!get_page_type(page, PGT_writable_page)) )
return 0;
- set_bit(_PGC_tlb_flush_on_type_change,
- &frame_table[pfn].u.inuse.count_info);
- return 1;
+ set_bit(_PGC_tlb_flush_on_type_change, &page->u.inuse.count_info);
}
- return get_page_from_pagenr(pfn, d);
+ return 1;
}
}
-static void put_page_from_l1e(l1_pgentry_t l1e)
+static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
{
struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
unsigned long l1v = l1_pgentry_val(l1e);
+ struct domain *e = page->u.inuse.domain;
if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
return;
+ if ( unlikely(e != d) )
+ {
+ /*
+ * Unmap a foreign page that may have been mapped via a grant table.
+ * Note that this can fail for a privileged domain that can map foreign
+ * pages via MMUEXT_SET_FOREIGNDOM. Such domains can have some mappings
+ * counted via a grant entry and some counted directly in the page
+ * structure's reference count. Note that reference counts won't get
+ * dangerously confused as long as we always try to decrement the
+ * grant entry first. We may end up with a mismatch between which
+ * mappings and which unmappings are counted via the grant entry, but
+ * really it doesn't matter as privileged domains have carte blanche.
+ */
+ if ( likely(gnttab_try_unmap(e, d, page, l1v & _PAGE_RW)) )
+ return;
+ /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
+ }
+
if ( l1v & _PAGE_RW )
{
put_page_and_type(page);
if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
PGT_ldt_page)) &&
unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) )
- invalidate_shadow_ldt(page->u.inuse.domain);
+ invalidate_shadow_ldt(e);
put_page(page);
}
}
fail:
while ( i-- > 0 )
- put_page_from_l1e(pl1e[i]);
+ put_page_from_l1e(pl1e[i], d);
unmap_domain_mem(pl1e);
return 0;
static void free_l1_table(struct pfn_info *page)
{
+ struct domain *d = page->u.inuse.domain;
unsigned long page_nr = page - frame_table;
l1_pgentry_t *pl1e;
int i;
pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- put_page_from_l1e(pl1e[i]);
+ put_page_from_l1e(pl1e[i], d);
unmap_domain_mem(pl1e);
}
{
l1_pgentry_t ol1e;
unsigned long _ol1e;
+ struct domain *d = current;
if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
{
if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
{
- put_page_from_l1e(nl1e);
+ put_page_from_l1e(nl1e, d);
return 0;
}
- put_page_from_l1e(ol1e);
+ put_page_from_l1e(ol1e, d);
return 1;
}
if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
return 0;
- put_page_from_l1e(ol1e);
+ put_page_from_l1e(ol1e, d);
return 1;
}
}
-static inline int readonly_page_from_l1e(l1_pgentry_t l1e)
-{
- struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
- unsigned long l1v = l1_pgentry_val(l1e);
-
- if ( (l1v & _PAGE_RW) || !(l1v & _PAGE_PRESENT) ||
- !pfn_is_ram(l1v >> PAGE_SHIFT) )
- return 0;
- put_page_type(page);
- return 1;
-}
-
-/* Writable Pagetables */
+/*************************
+ * Writable Pagetables
+ */
ptwr_info_t ptwr_info[NR_CPUS] =
{ [ 0 ... NR_CPUS-1 ] =
nl1e = pl1e[i];
if (likely(l1_pgentry_val(nl1e) == l1_pgentry_val(ol1e)))
continue;
- if (likely((l1_pgentry_val(nl1e) ^ l1_pgentry_val(ol1e)) ==
- _PAGE_RW)) {
- if (likely(readonly_page_from_l1e(nl1e)))
- continue;
- }
if (unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT))
- put_page_from_l1e(ol1e);
+ put_page_from_l1e(ol1e, current);
if (unlikely(!get_page_from_l1e(nl1e, current)))
BUG();
}
if (likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)))
continue;
if (unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT))
- put_page_from_l1e(ol1e);
+ put_page_from_l1e(ol1e, current);
if (unlikely(!get_page_from_l1e(nl1e, current)))
BUG();
}
clear_bit(smp_processor_id(), &wait_init_idle);
smp_threads_ready = 1;
smp_commence(); /* Tell other CPUs that state of the world is stable. */
- while (wait_init_idle)
+ while ( wait_init_idle != 0 )
{
cpu_relax();
barrier();
{
struct domain **pd;
unsigned long flags;
+ atomic_t old, new;
if ( !test_bit(DF_DYING, &d->flags) )
BUG();
/* May be already destructed, or get_domain() can race us. */
- if ( cmpxchg(&d->refcnt.counter, 0, DOMAIN_DESTRUCTED) != 0 )
+ _atomic_set(old, 0);
+ _atomic_set(new, DOMAIN_DESTRUCTED);
+ old = atomic_compareandswap(old, new, &d->refcnt);
+ if ( _atomic_read(old) != 0 )
return;
DPRINTK("Releasing task %u\n", d->domain);
#include <xen/config.h>
#include <xen/sched.h>
+#define PIN_FAIL(_rc, _f, _a...) \
+ do { \
+ DPRINTK( _f, ## _a ); \
+ rc = -(_rc); \
+ goto out; \
+ } while ( 0 )
+
static inline void
check_tlb_flush(
active_grant_entry_t *a)
active_grant_entry_t *act;
grant_entry_t *sha;
long rc = 0;
+ unsigned long frame;
ld = current;
return -EINVAL;
}
- if ( unlikely((rd = find_domain_by_id(dom)) == NULL) )
+ if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+ unlikely(ld == rd) )
{
+ if ( rd != NULL )
+ put_domain(rd);
DPRINTK("Could not find domain %d\n", dom);
return -ESRCH;
}
act = &rd->grant_table->active[ref];
sha = &rd->grant_table->shared[ref];
+ spin_lock(&rd->grant_table->lock);
+
if ( act->status == 0 )
{
if ( unlikely(pin_flags == 0) )
if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
unlikely(sdom != ld->domain) )
- {
- DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
+ PIN_FAIL(EINVAL,
+ "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
sflags, sdom, ld->domain);
- rc = -EINVAL;
- goto out;
- }
sflags |= GTF_reading;
if ( !(pin_flags & GNTPIN_readonly) )
{
sflags |= GTF_writing;
if ( unlikely(sflags & GTF_readonly) )
- {
- DPRINTK("Attempt to write-pin a read-only grant entry.\n");
- rc = -EINVAL;
- goto out;
- }
+ PIN_FAIL(EINVAL,
+ "Attempt to write-pin a r/o grant entry.\n");
}
/* Merge two 16-bit values into a 32-bit combined update. */
/* NB. prev_sflags is updated in place to seen value. */
if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
prev_scombo | GTF_writing)) )
- {
- DPRINTK("Fault while modifying shared flags and domid.\n");
- rc = -EINVAL;
- goto out;
- }
+ PIN_FAIL(EINVAL,
+ "Fault while modifying shared flags and domid.\n");
/* Did the combined update work (did we see what we expected?). */
if ( prev_scombo == scombo )
}
/* rmb(); */ /* not on x86 */
+ frame = sha->frame;
+ if ( unlikely(!pfn_is_ram(frame)) ||
+ unlikely(!((pin_flags & GNTPIN_readonly) ?
+ get_page(&frame_table[frame], rd) :
+ get_page_and_type(&frame_table[frame], rd,
+ PGT_writable_page))) )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ clear_bit(_GTF_reading, &sha->flags);
+ PIN_FAIL(EINVAL,
+ "Could not pin the granted frame!\n");
+ }
act->status = pin_flags;
act->domid = sdom;
- act->frame = sha->frame;
+ act->frame = frame;
make_entry_mappable(rd->grant_table, act);
}
if ( unlikely((act->status &
(GNTPIN_wmap_mask|GNTPIN_rmap_mask)) != 0) )
- {
- DPRINTK("Attempt to deactivate a mapped g.e. (%x)\n", act->status);
- rc = -EINVAL;
- goto out;
- }
+ PIN_FAIL(EINVAL,
+ "Attempt to deactiv a mapped g.e. (%x)\n", act->status);
+
+ frame = act->frame;
+ if ( !(act->status & GNTPIN_readonly) )
+ put_page_type(&frame_table[frame]);
+ put_page(&frame_table[frame]);
act->status = 0;
make_entry_unmappable(rd->grant_table, act);
(unlikely((act->status & GNTPIN_wmap_mask) != 0) ||
(((pin_flags & GNTPIN_host_accessible) == 0) &&
unlikely((act->status & GNTPIN_rmap_mask) != 0))) )
- {
- DPRINTK("Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
+ PIN_FAIL(EINVAL,
+ "Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
pin_flags, act->status);
- rc = -EINVAL;
- goto out;
- }
/* Check for changes to host accessibility. */
if ( pin_flags & GNTPIN_host_accessible )
{
if ( !(act->status & GNTPIN_readonly) )
{
+ put_page_type(&frame_table[act->frame]);
check_tlb_flush(act);
clear_bit(_GTF_writing, &sha->flags);
}
prev_sflags = sflags;
if ( unlikely(prev_sflags & GTF_readonly) )
- {
- DPRINTK("Attempt to write-pin a read-only grant entry.\n");
- rc = -EINVAL;
- goto out;
- }
-
+ PIN_FAIL(EINVAL,
+ "Attempt to write-pin a r/o grant entry.\n");
+
+ if ( unlikely(!get_page_type(&frame_table[act->frame],
+ PGT_writable_page)) )
+ PIN_FAIL(EINVAL,
+ "Attempt to write-pin a unwritable page.\n");
+
/* NB. prev_sflags is updated in place to seen value. */
if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
prev_sflags | GTF_writing)) )
- {
- DPRINTK("Fault while modifying shared flags.\n");
- rc = -EINVAL;
- goto out;
- }
+ PIN_FAIL(EINVAL,
+ "Fault while modifying shared flags.\n");
}
while ( prev_sflags != sflags );
}
(void)__put_user(act->frame, &uop->host_phys_addr);
out:
+ spin_unlock(&rd->grant_table->lock);
put_domain(rd);
return rc;
}
return rc;
}
+int
+gnttab_try_map(
+ struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
+{
+ return 0;
+}
+
+int
+gnttab_try_unmap(
+ struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
+{
+ return 0;
+}
+
int
grant_table_create(
struct domain *d)
SHARE_PFN_WITH_DOMAIN(virt_to_page(t->shared), d);
/* Okay, install the structure. */
+ wmb(); /* avoid races with lock-free access to d->grant_table */
d->grant_table = t;
return 0;
xmem_cache_init();
xmem_cache_sizes_init(max_page);
+ /*
+ * Create a domain-structure allocator. The SLAB_NO_REAP flag is essential!
+ * This is because in some situations a domain's reference count will be
+ * incremented by someone with no other handle on the structure -- this is
+ * inherently racey because the struct could be freed by the time that the
+ * count is incremented. By specifying 'no-reap' we ensure that, worst
+ * case, they increment some other domain's count, rather than corrupting
+ * a random field in a random structure!
+ * See, for example, arch/x86/memory.c:get_page_from_l1e().
+ */
domain_struct_cachep = xmem_cache_create(
"domain_cache", sizeof(struct domain),
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ 0, SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
if ( domain_struct_cachep == NULL )
panic("No slab cache for task structs.");
unsigned long alloc_xenheap_pages(int order)
{
struct pfn_info *pg;
- int attempts = 0;
+ int i, attempts = 0;
retry:
if ( unlikely((pg = alloc_heap_pages(MEMZONE_XEN, order)) == NULL) )
goto no_memory;
+
memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
+
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ pg[i].u.inuse.count_info = PGC_always_set;
+ pg[i].u.inuse.domain = NULL;
+ pg[i].u.inuse.type_info = 0;
+ }
+
return (unsigned long)page_to_virt(pg);
no_memory:
{
struct pfn_info *pg;
unsigned long mask, flushed_mask, pfn_stamp, cpu_stamp;
- int i;
+ int i, j;
ASSERT(!in_irq());
flushed_mask = 0;
for ( i = 0; i < (1 << order); i++ )
{
- pg[i].u.inuse.domain = NULL;
- pg[i].u.inuse.type_info = 0;
-
if ( (mask = (pg[i].u.free.cpu_mask & ~flushed_mask)) != 0 )
{
pfn_stamp = pg[i].tlbflush_timestamp;
- for ( i = 0; (mask != 0) && (i < smp_num_cpus); i++ )
+ for ( j = 0; (mask != 0) && (j < smp_num_cpus); j++ )
{
- if ( mask & (1<<i) )
+ if ( mask & (1<<j) )
{
- cpu_stamp = tlbflush_time[i];
+ cpu_stamp = tlbflush_time[j];
if ( !NEED_FLUSH(cpu_stamp, pfn_stamp) )
- mask &= ~(1<<i);
+ mask &= ~(1<<j);
}
}
flushed_mask |= mask;
}
}
+
+ pg[i].u.inuse.count_info = PGC_always_set;
+ pg[i].u.inuse.domain = NULL;
+ pg[i].u.inuse.type_info = 0;
}
if ( d == NULL )
{
pg[i].u.inuse.domain = d;
wmb(); /* Domain pointer must be visible before updating refcnt. */
- pg[i].u.inuse.count_info = PGC_allocated | 1;
+ pg[i].u.inuse.count_info |= PGC_allocated | 1;
list_add_tail(&pg[i].list, &d->page_list);
}
if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
{
spin_lock_recursive(&d->page_alloc_lock);
+
for ( i = 0; i < (1 << order); i++ )
list_del(&pg[i].list);
+
d->xenheap_pages -= 1 << order;
drop_dom_ref = (d->xenheap_pages == 0);
+
spin_unlock_recursive(&d->page_alloc_lock);
}
else if ( likely(d != NULL) )
for ( i = 0; i < (1 << order); i++ )
{
- pg[i].tlbflush_timestamp = tlbflush_clock;
- pg[i].u.inuse.count_info = 0;
- pg[i].u.free.cpu_mask = 1 << d->processor;
+ pg[i].tlbflush_timestamp = tlbflush_clock;
+ pg[i].u.free.cpu_mask = 1 << d->processor;
list_del(&pg[i].list);
}
#define __ARCH_X86_ATOMIC__
#include <xen/config.h>
-
-/*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
+#include <asm/system.h>
#ifdef CONFIG_SMP
#define LOCK "lock ; "
#endif
/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
+ * NB. I've pushed the volatile qualifier into the operations. This allows
+ * fast accessors such as _atomic_read() and _atomic_set() which don't give
+ * the compiler a fit.
*/
-typedef struct { volatile int counter; } atomic_t;
+typedef struct { int counter; } atomic_t;
#define ATOMIC_INIT(i) { (i) }
*
* Atomically reads the value of @v. Note that the guaranteed
* useful range of an atomic_t is only 24 bits.
- */
-#define atomic_read(v) ((v)->counter)
+ */
+#define _atomic_read(v) ((v).counter)
+#define atomic_read(v) (*(volatile int *)&((v)->counter))
/**
* atomic_set - set atomic variable
* Atomically sets the value of @v to @i. Note that the guaranteed
* useful range of an atomic_t is only 24 bits.
*/
-#define atomic_set(v,i) (((v)->counter) = (i))
+#define _atomic_set(v,i) (((v).counter) = (i))
+#define atomic_set(v,i) (*(volatile int *)&((v)->counter) = (i))
/**
* atomic_add - add integer to atomic variable
{
__asm__ __volatile__(
LOCK "addl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
+ :"=m" (*(volatile int *)&v->counter)
+ :"ir" (i), "m" (*(volatile int *)&v->counter));
}
/**
{
__asm__ __volatile__(
LOCK "subl %1,%0"
- :"=m" (v->counter)
- :"ir" (i), "m" (v->counter));
+ :"=m" (*(volatile int *)&v->counter)
+ :"ir" (i), "m" (*(volatile int *)&v->counter));
}
/**
__asm__ __volatile__(
LOCK "subl %2,%0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
+ :"=m" (*(volatile int *)&v->counter), "=qm" (c)
+ :"ir" (i), "m" (*(volatile int *)&v->counter) : "memory");
return c;
}
{
__asm__ __volatile__(
LOCK "incl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
+ :"=m" (*(volatile int *)&v->counter)
+ :"m" (*(volatile int *)&v->counter));
}
/**
{
__asm__ __volatile__(
LOCK "decl %0"
- :"=m" (v->counter)
- :"m" (v->counter));
+ :"=m" (*(volatile int *)&v->counter)
+ :"m" (*(volatile int *)&v->counter));
}
/**
__asm__ __volatile__(
LOCK "decl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
+ :"=m" (*(volatile int *)&v->counter), "=qm" (c)
+ :"m" (*(volatile int *)&v->counter) : "memory");
return c != 0;
}
__asm__ __volatile__(
LOCK "incl %0; sete %1"
- :"=m" (v->counter), "=qm" (c)
- :"m" (v->counter) : "memory");
+ :"=m" (*(volatile int *)&v->counter), "=qm" (c)
+ :"m" (*(volatile int *)&v->counter) : "memory");
return c != 0;
}
__asm__ __volatile__(
LOCK "addl %2,%0; sets %1"
- :"=m" (v->counter), "=qm" (c)
- :"ir" (i), "m" (v->counter) : "memory");
+ :"=m" (*(volatile int *)&v->counter), "=qm" (c)
+ :"ir" (i), "m" (*(volatile int *)&v->counter) : "memory");
return c;
}
+static __inline__ atomic_t atomic_compareandswap(
+ atomic_t old, atomic_t new, atomic_t *v)
+{
+ atomic_t rc;
+ rc.counter =
+ __cmpxchg(&v->counter, old.counter, new.counter, sizeof(int));
+ return rc;
+}
+
/* Atomic operations are already serializing on x86 */
#define smp_mb__before_atomic_dec() barrier()
#define smp_mb__after_atomic_dec() barrier()
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated 29
#define PGC_allocated (1<<_PGC_allocated)
- /* 28-bit count of references to this frame. */
-#define PGC_count_mask ((1<<29)-1)
-
+ /* This bit is always set, guaranteeing that the count word is never zero. */
+#define _PGC_always_set 28
+#define PGC_always_set (1<<_PGC_always_set)
+ /* 27-bit count of references to this frame. */
+#define PGC_count_mask ((1<<28)-1)
/* We trust the slab allocator in slab.c, and our use of it. */
#define PageSlab(page) (1)
wmb(); /* install valid domain ptr before updating refcnt. */ \
spin_lock(&(_dom)->page_alloc_lock); \
/* _dom holds an allocation reference */ \
- (_pfn)->u.inuse.count_info = PGC_allocated | 1; \
+ ASSERT((_pfn)->u.inuse.count_info == PGC_always_set); \
+ (_pfn)->u.inuse.count_info |= PGC_allocated | 1; \
if ( unlikely((_dom)->xenheap_pages++ == 0) ) \
get_knownalive_domain(_dom); \
list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list); \
unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
unlikely(p != domain) ) /* Wrong owner? */
{
- DPRINTK("Error pfn %08lx: ed=%p(%u), sd=%p(%u),"
- " caf=%08x, taf=%08x\n",
- page_to_pfn(page), domain, domain->domain,
- p, (p && !((x & PGC_count_mask) == 0))?p->domain:999,
+ DPRINTK("Error pfn %08lx: ed=%p, sd=%p, caf=%08x, taf=%08x\n",
+ page_to_pfn(page), domain, p,
x, page->u.inuse.type_info);
return 0;
}
void ptwr_flush_inactive(void);
int ptwr_do_page_fault(unsigned long);
-static always_inline void
-__cleanup_writable_pagetable(
- const int what)
-{
- int cpu = smp_processor_id();
-
- if (what & PTWR_CLEANUP_ACTIVE)
- if (ptwr_info[cpu].disconnected != ENTRIES_PER_L2_PAGETABLE)
- ptwr_reconnect_disconnected(0L);
- if (what & PTWR_CLEANUP_INACTIVE)
- if (ptwr_info[cpu].writable_idx)
- ptwr_flush_inactive();
-}
-
-static always_inline void
-cleanup_writable_pagetable(
- struct domain *d, const int what)
-{
- if ( unlikely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
- __cleanup_writable_pagetable(what);
-}
+#define __cleanup_writable_pagetable(_what) \
+do { \
+ int cpu = smp_processor_id(); \
+ if ((_what) & PTWR_CLEANUP_ACTIVE) \
+ if (ptwr_info[cpu].disconnected != ENTRIES_PER_L2_PAGETABLE) \
+ ptwr_reconnect_disconnected(0L); \
+ if ((_what) & PTWR_CLEANUP_INACTIVE) \
+ if (ptwr_info[cpu].writable_idx) \
+ ptwr_flush_inactive(); \
+} while ( 0 )
+
+#define cleanup_writable_pagetable(_d, _w) \
+ do { \
+ if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) \
+ __cleanup_writable_pagetable(_w); \
+ } while ( 0 )
#endif /* __ASM_X86_MM_H__ */
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
-/*
- * We need the APIC definitions automatically as part of 'smp.h'
- */
#ifndef __ASSEMBLY__
#include <xen/config.h>
-/*#include <xen/threads.h>*/
-#include <asm/ptrace.h>
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-#ifndef __ASSEMBLY__
#include <asm/fixmap.h>
-#include <asm/bitops.h>
#include <asm/mpspec.h>
-#ifdef CONFIG_X86_IO_APIC
#include <asm/io_apic.h>
-#endif
#include <asm/apic.h>
#endif
-#endif
#ifdef CONFIG_SMP
#ifndef __ASSEMBLY__
extern int smp_num_siblings;
extern int cpu_sibling_map[];
-extern void smp_flush_tlb(void);
-extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
-extern void smp_send_reschedule(int cpu);
-extern void smp_invalidate_rcv(void); /* Process an NMI */
-extern void (*mtrr_hook) (void);
-
/*
* On x86 all CPUs are mapped 1:1 to the APIC space.
* This simplifies scheduling and IPI sending and
case 1:
__asm__ __volatile__("xchgb %b0,%1"
:"=q" (x)
- :"m" (*__xg(ptr)), "0" (x)
+ :"m" (*__xg((volatile void *)ptr)), "0" (x)
:"memory");
break;
case 2:
__asm__ __volatile__("xchgw %w0,%1"
:"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
+ :"m" (*__xg((volatile void *)ptr)), "0" (x)
:"memory");
break;
#if defined(__i386__)
case 4:
__asm__ __volatile__("xchgl %0,%1"
:"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
+ :"m" (*__xg((volatile void *)ptr)), "0" (x)
:"memory");
break;
#elif defined(__x86_64__)
case 4:
__asm__ __volatile__("xchgl %k0,%1"
:"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
+ :"m" (*__xg((volatile void *)ptr)), "0" (x)
:"memory");
break;
case 8:
__asm__ __volatile__("xchgq %0,%1"
:"=r" (x)
- :"m" (*__xg(ptr)), "0" (x)
+ :"m" (*__xg((volatile void *)ptr)), "0" (x)
:"memory");
break;
#endif
case 1:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "q"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
: "memory");
return prev;
case 2:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
: "memory");
return prev;
#if defined(__i386__)
case 4:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
: "memory");
return prev;
#elif defined(__x86_64__)
case 4:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
: "memory");
return prev;
case 8:
__asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
: "memory");
return prev;
#endif
#ifndef __XEN_GRANT_H__
#define __XEN_GRANT_H__
+#include <xen/config.h>
+#include <xen/mm.h>
#include <hypervisor-ifs/grant_table.h>
/* Active grant entry - used for shadowing GTF_permit_access grants. */
} grant_table_t;
/* Start-of-day system initialisation. */
-void grant_table_init(void);
+void grant_table_init(
+ void);
/* Create/destroy per-domain grant table context. */
-int grant_table_create(struct domain *d);
-void grant_table_destroy(struct domain *d);
+int grant_table_create(
+ struct domain *d);
+void grant_table_destroy(
+ struct domain *d);
+
+/* Create/destroy host-CPU mappings via a grant-table entry. */
+int gnttab_try_map(
+ struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
+int gnttab_try_unmap(
+ struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
#endif /* __XEN_GRANT_H__ */
#ifndef __SCHED_H__
#define __SCHED_H__
+#define STACK_SIZE (2*PAGE_SIZE)
+#define MAX_DOMAIN_NAME 16
+
#include <xen/config.h>
#include <xen/types.h>
#include <xen/spinlock.h>
#include <asm/processor.h>
#include <hypervisor-ifs/hypervisor-if.h>
#include <hypervisor-ifs/dom0_ops.h>
-#include <xen/grant_table.h>
#include <xen/list.h>
#include <xen/time.h>
#include <xen/ac_timer.h>
#include <xen/delay.h>
#include <asm/atomic.h>
-
-#define STACK_SIZE (2*PAGE_SIZE)
#include <asm/current.h>
-
-#define MAX_DOMAIN_NAME 16
+#include <xen/spinlock.h>
+#include <xen/grant_table.h>
extern unsigned long volatile jiffies;
extern rwlock_t tasklist_lock;
-#include <xen/spinlock.h>
-
struct domain;
typedef struct event_channel_st
* Use this when you don't have an existing reference to @d. It returns
* FALSE if @d is being destructed.
*/
-static inline int get_domain(struct domain *d)
+static always_inline int get_domain(struct domain *d)
{
- atomic_inc(&d->refcnt);
- return !(atomic_read(&d->refcnt) & DOMAIN_DESTRUCTED);
+ atomic_t old, new, seen = d->refcnt;
+ do
+ {
+ old = seen;
+ if ( unlikely(_atomic_read(old) & DOMAIN_DESTRUCTED) )
+ return 0;
+ _atomic_set(new, _atomic_read(old) + 1);
+ seen = atomic_compareandswap(old, new, &d->refcnt);
+ }
+ while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
+ return 1;
}
/*